library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'monocle3'
## The following objects are masked from 'package:Biobase':
## 
##     exprs, fData, fData<-, pData, pData<-
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'sp'
## The following object is masked from 'package:IRanges':
## 
##     %over%
## 'SeuratObject' was built under R 4.3.1 but the current version is
## 4.3.2; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.6.3 but the current
## version is 1.6.4; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
## 
## Attaching package: 'SeuratObject'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays
## The following object is masked from 'package:GenomicRanges':
## 
##     intersect
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following object is masked from 'package:IRanges':
## 
##     intersect
## The following object is masked from 'package:S4Vectors':
## 
##     intersect
## The following object is masked from 'package:BiocGenerics':
## 
##     intersect
## The following object is masked from 'package:base':
## 
##     intersect
## 
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays
options(Seurat.object.assay.version = "v3")

data <- readRDS(file='./data.RDS')

args <- commandArgs(trailingOnly = TRUE)
args
## [1] "2000" "30"   "Y_14"
n_features = as.numeric(args[1])
n_pcs = as.numeric(args[2])

data <- NormalizeData(object = data, verbose = FALSE)
data <- FindVariableFeatures(object = data, nfeatures = n_features, verbose = FALSE, selection.method = 'vst')
data <- ScaleData(data, verbose = FALSE)
data <- RunPCA(data, npcs = n_pcs, verbose = FALSE)
data <- FindNeighbors(data, dims = 1:n_pcs)
## Computing nearest neighbor graph
## Computing SNN
data <- RunUMAP(data, reduction = "pca", dims = 1:n_pcs)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 22:11:28 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
## Also defined by 'spam'
## 22:11:28 Read 73258 rows and found 30 numeric columns
## 22:11:28 Using Annoy for neighbor search, n_neighbors = 30
## Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
## Also defined by 'spam'
## 22:11:28 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:11:32 Writing NN index file to temp file /var/folders/q7/0v1pdn252719pyv5pjr1k_r80000gn/T//RtmpQ8WoCF/file13b5ea3c5abe
## 22:11:32 Searching Annoy index using 1 thread, search_k = 3000
## 22:11:45 Annoy recall = 100%
## 22:11:46 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 22:11:47 Initializing from normalized Laplacian + noise (using RSpectra)
## 22:11:48 Commencing optimization for 200 epochs, with 3180426 positive edges
## 22:12:09 Optimization finished
data@active.assay = 'RNA'
DimPlot(data, group.by = c("day"))

data$dayint <- data[[]]$day
data$dayint <- ifelse(data$dayint == "iPSC", 20, data$dayint)
data$dayint <- as.numeric(data$dayint)

FeaturePlot(data, "dayint")

cds <- SeuratWrappers::as.cell_data_set(data) #change to cds here
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
cds <- cluster_cells(cds)

plot_cells(cds, show_trajectory_graph = TRUE,label_principal_points = TRUE,
           color_cells_by = "partition")
## No trajectory to plot. Has learn_graph() been called yet?
## Cannot label principal points when no trajectory to plot. Has learn_graph() been called yet?

cds <- learn_graph(cds, use_partition = FALSE) #graph learned across all partitions
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
plot_cells(cds, show_trajectory_graph = TRUE,label_principal_points = TRUE,
           color_cells_by = "partition")
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cds
## class: cell_data_set 
## dim: 27998 73258 
## metadata(1): citations
## assays(2): counts logcounts
## rownames(27998): Xkr4 Gm1992 ... Vmn2r122 CAAA01147332.1
## rowData names(0):
## colnames(73258): AAACCTGCAATGACCT-1_1 AAACCTGGTCTGCAAT-1_1 ...
##   TTTGTCAAGTCGTTTG-1_46 TTTGTCACAAACGCGA-1_46
## colData names(8): orig.ident nCount_RNA ... ident Size_Factor
## reducedDimNames(2): PCA UMAP
## mainExpName: RNA
## altExpNames(0):
root_pr_node = as.character(args[3])
cds <- order_cells(cds,
  reduction_method = "UMAP",
  root_pr_nodes = root_pr_node,
  verbose = FALSE)
plot_cells(cds, color_cells_by = "pseudotime", label_branch_points=FALSE, label_leaves=FALSE)
## Cells aren't colored in a way that allows them to be grouped.